#Import Statements
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from pandas.plotting import radviz
import plotly.express as plotlybar
import plotly.express as px
import plotly.graph_objects as go
import warnings
warnings.filterwarnings('ignore')
sns.set_theme(style="darkgrid")
#Reading the datasets and storing it in a dataframe
us_counties_df=pd.read_csv('2021-04-21_us_counties_covid19_daily.csv')
us_states_df=pd.read_csv('2021-04-21_us_states_covid19_daily.csv')
us_daily_df=pd.read_csv('2021-04-21_covid19_daily.csv')
us_postal_code_df=pd.read_excel('US postal code.xlsx')
#Formatting the dataframe by including the 'State' column
us_states_df.rename(columns={'state':'Postal Code'},inplace=True)
us_states_df=us_states_df.merge(us_postal_code_df,on=['Postal Code'],how='left')
us_states_df.head()
#Formatting the 'Date' Column to date-time format
us_states_df['date']=pd.to_datetime(us_states_df['date'],format='%Y%m%d')
us_daily_df['date']=pd.to_datetime(us_daily_df['date'],format='%Y%m%d')
us_states_df.rename(columns={'State/District':'State'},inplace=True)
us_daily_df.head()
us_states_df.head()
cases_CA = us_states_df[us_states_df['State'] == 'California']
cases_CA
#Grouping the US States and resetting their indexes
us_states_grp = us_states_df.groupby('State').sum().reset_index()
us_states_grp
#Horizontal Barplot showing the Top 10 States affected by COVID-19
top_10 = us_states_grp.sort_values(by=['total'],ascending=False).head(10)
plt.figure(figsize=(15,6))
x=top_10['total']
y=top_10['State']
plot = sns.barplot(x,y)
for i,(value,name) in enumerate(zip(x,y)):
plot.text(value,i-0.05,f'{value:,.0f}',size=10)
plt.show()
# Comparison of Positive versus Negative Cases for the Top 5 States affected due to COVID-19
top_5 = us_states_grp.sort_values(by=['total'], ascending=False).head()
### Generate a Barplot
plt.figure(figsize=(15,5))
negative = sns.barplot(top_5['negative'], top_5['State'], color = 'blue', label='negative')
positive = sns.barplot(top_5['positive'], top_5['State'], color = 'cyan', label='positive')
### Add Texts for Barplots
for i,(value,name) in enumerate(zip(top_5['negative'],top_5['State'])):
negative.text(value,i-0.05,f'{value:,.0f}',size=10)
for i,(value,name) in enumerate(zip(top_5['positive'],top_5['State'])):
positive.text(value,i-0.05,f'{value:,.0f}',size=10)
plt.legend(loc=4)
plt.show()
#Horizontal Barplot showing the Top 10 States with highest hospitalization by COVID-19
top_10 = us_states_grp.sort_values(by=['hospitalizedCumulative'],ascending=False).head(10)
plt.figure(figsize=(15,6))
x=top_10['hospitalizedCumulative']
y=top_10['State']
plot = sns.barplot(x,y)
for i,(value,name) in enumerate(zip(x,y)):
plot.text(value,i-0.05,f'{value:,.0f}',size=10)
plt.show()
# Comparison of Hospitalized versus Deaths Cases for the Top 5 States affected by COVID-19
top_5 = us_states_grp.sort_values(by=['hospitalizedCumulative'], ascending=False).head()
### Generate a Barplot
plt.figure(figsize=(15,5))
hospitalized = sns.barplot(top_5['hospitalized'], top_5['State'], color = 'green', label='hospitalized')
death = sns.barplot(top_5['death'], top_5['State'], color = 'red', label='death')
### Add Texts for Barplots
for i,(value,name) in enumerate(zip(top_5['hospitalized'],top_5['State'])):
hospitalized.text(value,i-0.05,f'{value:,.0f}',size=10)
for i,(value,name) in enumerate(zip(top_5['death'],top_5['State'])):
death.text(value,i-0.05,f'{value:,.0f}',size=10)
plt.legend(loc=4)
plt.show()
#Dataframe for plotting the Choropleth for the United States COVID-19
us_states_map = us_states_df.groupby(['State','Postal Code'])[['positive','total','death','recovered','hospitalized','hospitalizedCurrently','onVentilatorCurrently','deathIncrease']].max().reset_index()
us_states_map.describe()
#'Positive COVID-19 Cases in US'
figure = px.choropleth(us_states_map,
locations='Postal Code',
locationmode='USA-states',
scope='usa',
color='positive',
hover_name='State',
color_continuous_scale='turbo',
range_color=[1,1200000],
title='Positive COVID-19 Cases in US')
figure.show()
# 'Currently Hospitalized COVID-19 Cases in US'
figure = px.choropleth(us_states_map,
locations='Postal Code',
locationmode='USA-states',
scope='usa',
color='hospitalizedCurrently',
hover_name='State',
color_continuous_scale='darkmint',
range_color=[1,15000],
title='Currently Hospitalized COVID-19 Cases in US')
figure.show()
# 'COVID-19 Deaths in US'
figure = px.choropleth(us_states_map,
locations='Postal Code',
locationmode='USA-states',
scope='usa',
color='death',
hover_name='State',
color_continuous_scale='portland',
range_color=[1,20000],
title='COVID-19 Deaths in US')
figure.show()
# 'Increase in the COVID-19 Death Rate in the US'
figure = px.choropleth(us_states_map,
locations='Postal Code',
locationmode='USA-states',
scope='usa',
color='deathIncrease',
hover_name='State',
color_continuous_scale='reds',
range_color=[1,250],
title='Increase in the COVID-19 Death Rate in the US')
figure.show()
# 'COVID-19 Patients Currently on Ventilator in the US'
figure = px.choropleth(us_states_map,
locations='Postal Code',
locationmode='USA-states',
scope='usa',
color='onVentilatorCurrently',
hover_name='State',
color_continuous_scale='teal',
range_color=[1,300],
title='COVID-19 Patients Currently on Ventilator in the US')
figure.show()
# Grouping the States which are under LOCKDOWN
state_grp1 = us_states_df[us_states_df['Postal Code'].isin(['DC', 'IL', 'MD', 'MS','VA'])]
state_grp2 = us_states_df[us_states_df['Postal Code'].isin(['AZ', 'FL', 'NC', 'TX'])]
state_grp3 = us_states_df[us_states_df['Postal Code'].isin(['MO', 'LA', 'MA','IL','NY','MI','PA'])]
# Building a Pivot Table to Index 'States' for plotting
grp1_pt = state_grp1.pivot_table(index=['State','date'],values=['positiveIncrease'],aggfunc=np.sum).reset_index()
grp2_pt = state_grp2.pivot_table(index=['State','date'],values=['recovered'],aggfunc=np.sum).reset_index()
grp2a_pt = state_grp2.pivot_table(index=['State','date'],values=['deathIncrease'],aggfunc=np.sum).reset_index()
grp3a_pt = state_grp3.pivot_table(index=['State','date'],values=['recovered'],aggfunc=np.sum).reset_index()
grp3b_pt = state_grp3.pivot_table(index=['State','date'],values=['death'],aggfunc=np.sum).reset_index()
# Plotting using Seaborn to Visualize the groups with thier attributes
sns.set_theme(style="darkgrid")
sns.relplot(x="date", y="positiveIncrease", hue='State', style='State', kind="line", height=6,aspect=2, data=grp1_pt).fig.autofmt_xdate()
sns.relplot(x="date", y="recovered", hue='State', kind="line", style='State', height=4,aspect=3, data=grp2_pt).fig.autofmt_xdate()
sns.relplot(x="date", y="deathIncrease", hue='State', kind="line", style='State', height=4,aspect=3, data=grp2a_pt).fig.autofmt_xdate()
sns.relplot(x="date", y="recovered", hue='State', kind="line", style='State', height=6,aspect=1, data=grp3a_pt).fig.autofmt_xdate()
sns.relplot(x="date", y="death", hue='State', kind="line", style='State', height=6,aspect=1, data=grp3b_pt).fig.autofmt_xdate()